import ma.const

import random
import os

job_id = 1
no_of_nodes = 6
task_capacity = 4
map_steps = 3
map_to_reduce_inps = 4
key_size = 6
no_of_files = no_of_nodes * task_capacity * map_steps
file_size = 32 * 1024 * 1024
no_of_keys = (no_of_files / map_to_reduce_inps) * 1000 
max_possible_weight = 20

main_dest_dir = "/state/partition1/datasets/"

words_inserted = 0

def get_cycled_word(i, no_letters):
    word = ''
    for j in range(no_letters):
        word += 'a'
    pos = 0
    while i > 0:
        letter = chr(ord('a') + (i % 26))
        i = i / 26
        prv_word = ''
        if pos != 0:
            prv_word = word[-pos:]
        word = word[:-1-pos] + letter + prv_word
        pos += 1
        if pos >= no_letters:
            raise
    return word

keys = []

def generate_keys():
    for idx in range(no_of_keys):
        word = get_cycled_word(idx, key_size)
        keys.append((word + '.' + word + '.' + word, int(random.random() * max_possible_weight) + 1))
        random.seed()


def make_list():
    return_list = []
    for i in keys:
        for y in range(i[1]):
            return_list.append(i[0])
    return return_list


if __name__ == '__main__':
    generate_keys()
    print('No of keys', len(keys))
    list = make_list()
    print('Length choice list:', len(list))
    dir = main_dest_dir + str(job_id)
    
    # ensure the destination directory exists
    if not os.path.exists(dir):
        os.makedirs(dir)
    
    for file_no in range(no_of_files):    
        filename = ma.const.JobsXmlData.get_str_data(ma.const.xml_map_input_filename, job_id, file_no)
        filepath = dir + os.sep + filename
        
        print(filepath)
        fd = open(filepath, 'w+')
        random.seed()
        
        curr_file_size = 0
        words_inserted = 0
        separator = ' '
        while curr_file_size < file_size:
            word = random.choice(list)
            word = word + separator
            curr_file_size += len(word)
            words_inserted += 1
            if words_inserted % 30 == 0:
                random.seed()
            fd.write(word)
        
        print('File output', filename, 'Word count', words_inserted)
        
        fd.close()
        